


************************************************************************************
************************************************************************************

clear all
set maxvar 11000


* Reads in data and some additional parameters:
do "Code/read_in_analysis_data.do"

************************************************************************************
************************************************************************************

* outputs some parameers set by read_in_analysis_data.do 


* Outputs initial things:
* Bootstrap reps:
local bs_reps_str = string(bs_reps, "%8.0fc")
write_single_string "`bs_reps_str'", file("Paper/Figures/single_numbers/bs_rep_count.tex")

* lpoly bootstrapped reps:
local bs_reps_str = string(bs_reps_lpoly, "%8.0fc")
write_single_string "`bs_reps_str'", file("Paper/Figures/single_numbers/bs_rep_count_lpoly.tex")


* Social cost of 1 kg of methane -- method using White House numbers:
local methane_social_cost_per_kg = string(round(social_cost_methane_v2, .01), "%4.2fc")
write_single_string "`methane_social_cost_per_kg'", file("Paper/Figures/single_numbers/methane_social_cost_per_kg.tex")


	
	
*******************************************************************************
*
*     ANALYSIS
* 
********************************************************************************


* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* SINGLE NUMBERS:
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

	* Count of total observations 
	count
	local sample_size = r(N)
	write_single_num `sample_size', file("Paper/Figures/single_numbers/count_total_sample.tex")
	
	* Count of total number in the original Wang paper:
	count if IncludedinAnalysis == "Yes"
	local wang_sample_size = r(N)
	write_single_num `wang_sample_size', file("Paper/Figures/single_numbers/count_wang_sample.tex")
	
	* Count of total number in the original Wang paper:
	count if IncludedinAnalysis == "No"
	local wang_sample_size = r(N)
	write_single_num `wang_sample_size', file("Paper/Figures/single_numbers/count_added_sample.tex")

	* Count of control:
	count if Treatment_Ind == 0
	local count_control = r(N)
	write_single_num `count_control', file("Paper/Figures/single_numbers/count_control.tex")
	
	
	* Count of treatment:
	count if Treatment_Ind == 1
	local count_treat = r(N)
	write_single_num `count_treat', file("Paper/Figures/single_numbers/count_treatment.tex")
	
	
	* Fraction of treatment:
	sum Treatment_Ind 
	local perc_treat = round(r(mean)*100)
	write_single_num `perc_treat', file("Paper/Figures/single_numbers/percent_treatment.tex")

	* Fraction in each treatment:
	* Annual
	sum Annual
	local perc_treat_annual = round(r(mean)*100)
	write_single_num `perc_treat_annual', file("Paper/Figures/single_numbers/percent_treatment_annual.tex")
	* Biannual
	sum Biannual
	local perc_treat_biannual = round(r(mean)*100)
	write_single_num `perc_treat_biannual', file("Paper/Figures/single_numbers/percent_treatment_biannual.tex")
	* Triiannual
	sum Triannual
	local perc_treat_triannual = round(r(mean)*100)
	write_single_num `perc_treat_triannual', file("Paper/Figures/single_numbers/percent_treatment_triannual.tex")
	
	* Count of cases with zero baseline leakage:
	count if Tot_emiss_Leak1==0
	local count = r(N)
	write_single_num `count', file("Paper/Figures/single_numbers/count_zero_baseline_leakage.tex")

	local frac_zero_baseline_leakage = round(100*`count'/`sample_size')
	write_single_num `frac_zero_baseline_leakage', file("Paper/Figures/single_numbers/frac_zero_baseline_leakage.tex")
	
	* Above median leakage
	qui sum Tot_emiss_Leak1, detail
	count if Tot_emiss_Leak1>=r(p50)
	local count = r(N)
	write_single_num `count', file("Paper/Figures/single_numbers/count_baseline_leakage_above_median.tex")

	* Below median but above zero leakage:
	qui sum Tot_emiss_Leak1, detail
	count if Tot_emiss_Leak1<r(p50) & Tot_emiss_Leak1>0
	local count = r(N)
	write_single_num `count', file("Paper/Figures/single_numbers/count_baseline_leakage_below_median_above_zero.tex")
	
	* Count of cases with zero baseline venting
	count if Tot_comp_Vent1==0
	local count = r(N)
	write_single_num `count', file("Paper/Figures/single_numbers/count_zero_baseline_venting.tex")

	* Zero baseline leakage and treatment / control group:
	count if Tot_emiss_Leak1 == 0 & Treatment_Ind == 1
	local count_treat_zeroleakage = r(N)
	write_single_num `count_treat_zeroleakage', file("Paper/Figures/single_numbers/count_treatment_zero_baseline_leakage.tex")
	
	count if Tot_emiss_Leak1 == 0 & Treatment_Ind == 0
	local count_control_zeroleakage = r(N)
	write_single_num `count_control_zeroleakage', file("Paper/Figures/single_numbers/count_control_zero_baseline_leakage.tex")
	

	* Count of sites with zero natural gas:
	count if ProdGas_1 == 0
	local count_zero_gas_at_baseline = r(N)
	write_single_num `count_zero_gas_at_baseline', file("Paper/Figures/single_numbers/count_zero_gas_at_baseline.tex")
	
	* Count of sites with non-missing natural gas:
	count if ~missing(ProdGas_1)
	local count_notmiss_gas_at_baseline = r(N)
	write_single_num `count_notmiss_gas_at_baseline', file("Paper/Figures/single_numbers/count_nonmissing_gas_at_baseline.tex")
	
	* Count of sites with missing natural gas:
	count if missing(ProdGas_1)
	local count_miss_gas_at_baseline = r(N)
	write_single_num `count_miss_gas_at_baseline', file("Paper/Figures/single_numbers/count_missing_gas_at_baseline.tex")
	
	
	* Fraction of wells that have non-zero natural conditional on not missing:
	local frac_positive_nat_gas = (`count_notmiss_gas_at_baseline' - `count_zero_gas_at_baseline')/`count_notmiss_gas_at_baseline'
	local perc_positive_nat_gas = round(`frac_positive_nat_gas'*100)
	write_single_num `perc_positive_nat_gas', file("Paper/Figures/single_numbers/perc_positive_nat_gas.tex")
	
	
	* Start years:
	sum Site_start_year, detail
	local median_start = r(p50)
	local min_start = r(min)
	local max_start = r(max)
	write_single_num `median_start', file("Paper/Figures/single_numbers/median_site_start.tex")
	write_single_num `min_start', file("Paper/Figures/single_numbers/min_site_start.tex")
	write_single_num `max_start', file("Paper/Figures/single_numbers/max_site_start.tex")
	
	
	* P-values to look at whether proportion in each treatment group skews too far from 0.25/0.25/0.25/0.25
	mean Annual 
	test Annual==0.25 
	local rp_annual = r(p)
	mean Biannual 
	test Biannual==0.25 
	local rp_biannual = r(p)
	local rp = round(r(p), 0.01)
	local rp = strofreal(`rp', "%03.2fc")
	mean Triannual 
	test Triannual==0.25 
	local rp_triannual = r(p)
	local rp = round(r(p), 0.01)
	local rp = strofreal(`rp', "%03.2fc")
	mean Control 
	test Control==0.25 
	local rp_control = r(p)
	local rp_min = min(`rp_annual', `rp_biannual', `rp_triannual', `rp_control')
	local rp_min = round(`rp_min', 0.01)
	local rp_min = strofreal(`rp_min', "%03.2fc")
	write_single_num `count', file("Paper/Figures/single_numbers/smallest_p_value_sample_treatment_groups.tex")
	
	* count of operators:
	tab Operator_1
	local opers_total = r(r)
	write_single_num `opers_total', file("Paper/Figures/single_numbers/total_operators_baseline.tex")

	* count with venting above the threshold (0.671 is the conversion factor for converting kg to cubic meters):
	tab Operator_1 if Emissionskgd_1 > 900*0.671
	local opers_exceeds_AERstandard_base = r(r)
	local sites_exceeds_AERstandard_base = r(N)

	tab Operator_5 if Emissionskgd_5 > 900*0.671
	local opers_exceeds_AERstandard_end = r(r)
	local sites_exceeds_AERstandard_end = r(N)
	
	write_single_num `opers_exceeds_AERstandard_base', file("Paper/Figures/single_numbers/opers_exceeds_AERstandard_base.tex")
	write_single_num `sites_exceeds_AERstandard_base', file("Paper/Figures/single_numbers/sites_exceeds_AERstandard_base.tex")
	write_single_num `opers_exceeds_AERstandard_end', file("Paper/Figures/single_numbers/opers_exceeds_AERstandard_end.tex")
	write_single_num `sites_exceeds_AERstandard_end', file("Paper/Figures/single_numbers/sites_exceeds_AERstandard_end.tex")
	
	
			
			
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* SUMMARY STATISTICS -- CONTINUOUS VARIABLES, INCLUDING OUTLIER INFO:
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

		
		gen baseline_Gas = ProdGas_1
		gen baseline_Oil = ProdOil_1
		gen baseline_emission = Emissionskgd_1
		gen baseline_leakage = Tot_emiss_Leak1
		gen baseline_venting = Tot_emiss_Vent1
		label var baseline_Gas "Gas production"
		label var baseline_Oil "Oil production"
		label var baseline_emission "Emissions"
		label var baseline_leakage "Leakage"
		label var baseline_venting "Venting"

		gen endline_Gas = ProdGas_5
		gen endline_Oil = ProdOil_5
		gen endline_emission = Emissionskgd_5
		gen endline_leakage = Tot_emiss_Leak5
		gen endline_venting = Tot_emiss_Vent5
		label var endline_Gas "Gas production"
		label var endline_Oil "Oil production"
		label var endline_emission "EEmissions"
		label var endline_leakage "LLeakage"
		label var endline_venting "Venting"

		* summary statistics for paper
		estpost tabstat baseline_leakage baseline_venting baseline_Gas baseline_Oil endline_leakage endline_venting endline_Gas endline_Oil, ///
			c(stat) stat(mean min p50 p95 max)
		esttab using Paper/Figures/summary_stats_outliers.tex, replace ///
			cells("mean(fmt(%13.0fc)) min p50 p95 max ") nonumber ///
			nomtitle nonote noobs tex f label collabels("Mean " " Min " " Median "  " 95th " " Max ")
		filefilter "Paper/Figures/summary_stats_outliers.tex" "Paper/Figures/summary_stats_outliers_1.tex" , ///
			replace from("hline") to("hline Baseline: \BS\BS")
		filefilter "Paper/Figures/summary_stats_outliers_1.tex" "Paper/Figures/summary_stats_outliers.tex" , ///
			replace from("LLeakage") to("\BShline Endline: \BS\BS Leakage")
		erase "Paper/Figures/summary_stats_outliers_1.tex"	

		drop baseline_emission baseline_leakage baseline_venting endline_emission endline_leakage endline_venting baseline_Gas baseline_Oil endline_Gas endline_Oil

		
		
		* Additional output for single numbers:
		sum Tot_emiss_Leak1, detail
		local max_leakage = string(round(r(max)), "%8.0fc")
		write_single_string "`max_leakage'", file("Paper/Figures/single_numbers/leakage_max_at_baseline.tex")
		local mean_leakage = string(round(r(mean)), "%8.0fc")
		write_single_string "`mean_leakage'", file("Paper/Figures/single_numbers/leakage_mean_at_baseline.tex")
		local ratio_p95_p50 = string(round(r(p95)/r(p50), 0.1), "%5.1fc")
		write_single_string "`ratio_p95_p50'", file("Paper/Figures/single_numbers/leakage_ratio_p95_p50_baseline.tex")
		local ratio_max_p50 = round(r(max)/r(p50))
		write_single_num `ratio_max_p50', file("Paper/Figures/single_numbers/leakage_ratio_max_p50_baseline.tex")

		* Additional output for single numbers:
		sum Tot_emiss_Leak1, detail
		local ratio_p95_p50 = round(r(p95)/r(p50))
		write_single_num `ratio_p95_p50', file("Paper/Figures/single_numbers/venting_ratio_p95_p50_baseline.tex")
		
		
		* Additional output for single numbers:
		sum Tot_emiss_Vent1, detail
		local mean_venting = string(round(r(mean)), "%8.0fc")
		write_single_string "`mean_venting'", file("Paper/Figures/single_numbers/venting_mean_at_baseline.tex")

		* sum baseline_Oil
		sum ProdOil_1, detail
		local mean_oil = string(round(r(mean)), "%8.0fc")
		write_single_string "`mean_oil'", file("Paper/Figures/single_numbers/oil_mean_at_baseline.tex")
		
		* sum baseline_Oil
		sum ProdGas_1, detail
		local mean_gas = string(round(r(mean)), "%8.0fc")
		write_single_string "`mean_gas'", file("Paper/Figures/single_numbers/gas_mean_at_baseline.tex")

		
		gen prodvalue_1 = ProdOil_1 * 6.28981 * 67.75 + ProdGas_1 * 3.75
		sum prodvalue_1, detail 
		sum prodvalue_1 if prodvalue_1 > 0, detail 
		
		
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* BALANCE TABLE:
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

		gen baseline_Gas = ProdGas_1
		gen baseline_Oil = ProdOil_1
		gen baseline_leakage = Tot_emiss_Leak1
		gen baseline_venting = Tot_emiss_Vent1
		label var baseline_Gas "Gas production"
		label var baseline_Oil "Oil production"
		label var baseline_leakage "Leakage"
		label var baseline_venting "Venting"
		label var Site_start_year "Site establishment year"

	* Creates two versions of table: One with and without the compare all three treatment groups:
	capture erase "Paper/Figures/balance_table.tex"
	capture erase "Paper/Figures/balance_table_TConly.tex"
	
	file open temp_file using "Paper/Figures/balance_table.tex", write
	file open temp_file2 using "Paper/Figures/balance_table_TConly.tex", write
	
	file write temp_file  " & Control & Treatment & Difference & All equal p-value \\ \hline " _newline
	file write temp_file2  " & Control & Treatment & Difference \\ \hline " _newline
	
	foreach var of varlist /*GasSite*/ Site_start_year baseline_leakage baseline_venting baseline_Gas baseline_Oil {
		
		* Gets variable label
		local lab: variable label `var'  // <- save variable label in local `lab' 
		
		* Gets mean
		mean `var', over(Treatment_Ind) cluster(Operator_1)
		
		local mean_control : di %03.2f `=e(b)[1,1]'
		local sd_control : di %03.2f `=e(sd)[1,1]'
		local mean_treat : di %03.2f `=e(b)[1,2]'
		local sd_treat : di %03.2f `=e(sd)[1,2]'
		
		* Gets difference:
		reg `var' Treatment_Ind, cluster(Operator_1)
		
		local mean_dif : di %03.2f `=e(b)[1,1]'
		local sd_dif : di %03.2f `= e(V)[1,1]^0.5'
		
		* Gets p value of test of all being equal:
		reg `var' Annual Biannual Triannual, cluster(Operator_1)
		test Annual==Biannual==Triannual==0
		local all_equal_pvalue : di %04.3f `=r(p)'
			
		di "`all_equal_pvalue'"
		
		* Writes output
		file write temp_file "  `lab' & `mean_control' & `mean_treat' & `mean_dif' & `all_equal_pvalue' \\ " _newline
		file write temp_file " & (`sd_control') & (`sd_treat') & (`sd_dif') & \\ " _newline
		
		file write temp_file2 "  `lab' & `mean_control' & `mean_treat' & `mean_dif' \\ " _newline
		file write temp_file2 " & (`sd_control') & (`sd_treat') & (`sd_dif') \\ " _newline
	
	}
	
	file write temp_file " \hline " _newline
	file write temp_file2 " \hline " _newline
	
	count if Treatment_Ind == 0
	local count_control = r(N)
	count if Treatment_Ind == 1
	local count_treat = r(N)
	file write temp_file  "N: & `count_control' & `count_treat' & & \\" _newline
	file write temp_file2  "N: & `count_control' & `count_treat' &\\" _newline
	
	file close temp_file
	file close temp_file2
	

	* Other balance stuff:
	* Whether equal in probability of having zero leakage
	gen zero_baseline_leakage = Tot_emiss_Leak1==0
	reg zero_baseline_leakage Treatment_Ind, cluster(Operator_1)
	test Treatment_Ind==0
	local is_zero_p_test = string(round(r(p),0.001), "%5.3fc")
	write_single_num `is_zero_p_test', file("Paper/Figures/single_numbers/baseline_leakage_is_zero_p_test.tex")

	* Whether equal in probability of having zero total emissions
	gen zero_baseline_emissions = Tot_emiss_Leak1==0 & Tot_emiss_Vent1==0
	reg zero_baseline_emissions Treatment_Ind, cluster(Operator_1) 
	test Treatment_Ind==0
	local is_zero_p_test = string(round(r(p),0.001), "%5.3fc")
	write_single_num `is_zero_p_test', file("Paper/Figures/single_numbers/baseline_emissions_is_zero_p_test.tex")

	


* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* BALANCE CONDITIONAL ON ZERO BASELINE LEAKAGE:
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

	* Conditional on zero baseline leakage, do treatment and control sites have same probability of zero total emissions?
	reg zero_baseline_emissions Treatment_Ind if zero_baseline_leakage==1, cluster(Operator_1)
	reg zero_baseline_emissions Treatment_Ind if zero_baseline_leakage==1 & IncludedinAnalysis == "Yes", cluster(Operator_1)
	// No significant different 
	
	drop zero_baseline_leakage
	drop zero_baseline_emissions

	* Creates two versions of table: One with and without the compare all three treatment groups:
	capture erase "Paper/Figures/balance_table_zero_leakage.tex"
	
	file open temp_file using "Paper/Figures/balance_table_zero_leakage.tex", write
	
	file write temp_file  " & Control & Treatment & Difference \\ \hline " _newline
	
	foreach var of varlist /*GasSite*/ Site_start_year baseline_venting baseline_Gas baseline_Oil {
		
		* Gets variable label
		local lab: variable label `var'  //  <- save variable label in local `lab' 
		
		* Gets mean
		mean `var' if Tot_emiss_Leak1==0, over(Treatment_Ind) cluster(Operator_1)
		
		local mean_control : di %03.2f `=e(b)[1,1]'
		local sd_control : di %03.2f `=e(sd)[1,1]'
		local mean_treat : di %03.2f `=e(b)[1,2]'
		local sd_treat : di %03.2f `=e(sd)[1,2]'
		
		* Gets difference:
		reg `var' Treatment_Ind if Tot_emiss_Leak1==0, cluster(Operator_1)
		
		local mean_dif : di %03.2f `=e(b)[1,1]'
		local sd_dif : di %03.2f `= e(V)[1,1]^0.5'
		
		if "`var'"=="baseline_venting" {
			qui test Treatment_Ind==0
			local test_vent_zero_base_leakage = r(p)
		}
		
		
		
		* Gets p value of test of all being equal:
		reg `var' Annual Biannual Triannual, cluster(Operator_1)
		test Annual==Biannual==Triannual==0
		local all_equal_pvalue : di %04.3f `=r(p)'
			
		di "`all_equal_pvalue'"
		

		* Writes output
		file write temp_file "  `lab' & `mean_control' & `mean_treat' & `mean_dif' \\ " _newline
		file write temp_file " & (`sd_control') & (`sd_treat') & (`sd_dif')\\ " _newline
	
	}
	
	file write temp_file " \hline " _newline
	
	count if Treatment_Ind == 0 & Tot_emiss_Leak1==0
	local count_control = r(N)
	count if Treatment_Ind == 1 & Tot_emiss_Leak1==0
	local count_treat = r(N)
	file write temp_file  "N: & `count_control' & `count_treat' & \\" _newline
	
	file close temp_file
	
	
	* Outputs the p-value for the baseline venting equality test
	di `test_vent_zero_base_leakage'
	local test_vent_zero_base_leakage = string(round(`test_vent_zero_base_leakage', .001), "%04.3f")
	write_single_string "`test_vent_zero_base_leakage'", file("Paper/Figures/single_numbers/test_vent_zero_base_leakage.tex")
	
	* Drops these new variables:
	drop baseline_Gas baseline_leakage baseline_Gas baseline_venting baseline_Oil 
	
	
	
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* REGRESSION: REPLICATING WANG et al.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

	reg Emissionskgd_5 Treatment_Ind, cluster(Operator_1)
	reg Emissionskgd_5 Treatment_Ind Emissionskgd_1, cluster(Operator_1)
	reg change_Emissions Treatment_Ind, cluster(Operator_1)
		test Treatment_Ind + _cons == 0

	reg Tot_emiss_Leak5 Treatment_Ind, cluster(Operator_1)
	reg Tot_emiss_Leak5 Treatment_Ind Tot_emiss_Leak1, cluster(Operator_1)
	reg change_Leak Treatment_Ind, cluster(Operator_1)
	
	reg Tot_emiss_Vent5 Treatment_Ind, cluster(Operator_1)
	reg Tot_emiss_Vent5 Treatment_Ind Tot_emiss_Vent1, cluster(Operator_1)
	reg change_Vent Treatment_Ind, cluster(Operator_1)
	


*******************************************************************************
*
*     Operator exploration and testing for spillovers onto the control site
* 
********************************************************************************

count if Operator_1 == Operator_5
count if Operator_1 != Operator_5
tab Operator_1
di r(r) // 18

tab Operator_1 Treatment_Ind , row
egen tag_Operator_1 = tag(Operator_1)
egen total_operator_sites = total(1), by(Operator_1)
gen log_total_Sites_by_Op_1 = log(total_operator_sites)


* Means treatment:
egen mean_Treat_by_Op = mean(Treatment_Ind), by(Operator_1)

* Total treatment sites:
egen total_Treat_sites = total(Treatment_Ind), by(Operator_1)

* Whether has above-median baseline leakage
qui sum Tot_emiss_Leak1, detail
gen above_median_Leak1 = Tot_emiss_Leak1 >= r(p50)

* Total treatment sites that are above median:
egen total_above_med_Leak_treat_sites = total(Treatment_Ind * above_median_Leak1), by(Operator_1)

* Fraction of treatment sites that have above-median leakage:
gen mean_above_med_Leak_treat_sites = total_above_med_Leak_treat_sites / total_Treat_sites

codebook mean_above_med_Leak_treat_sites

* Label variables:
label var log_total_Sites_by_Op_1 "log(total sites)"
label var mean_Treat_by_Op        "fraction of sites treated"
label var mean_above_med_Leak_treat_sites "frac treatment sites with above-median leakage"                                    
label var Tot_emiss_Leak1 "baseline leakage"


* Regressions testing for SUTVA -- control sites with zero baseline leakage: 


reg Tot_emiss_Leak5 log_total_Sites_by_Op_1	                                                      if Tot_emiss_Leak1==0 & Treatment_Ind == 0, cluster(Operator_1)
	estadd local zero_baseline_leakage "Y"
	estimates store reg_1 
reg Tot_emiss_Leak5 log_total_Sites_by_Op_1	mean_Treat_by_Op                                      if Tot_emiss_Leak1==0 & Treatment_Ind == 0, cluster(Operator_1)
	estadd local zero_baseline_leakage "Y"
	estimates store reg_2 
reg Tot_emiss_Leak5 log_total_Sites_by_Op_1	mean_Treat_by_Op mean_above_med_Leak_treat_sites      if Tot_emiss_Leak1==0 & Treatment_Ind == 0, cluster(Operator_1)
	estadd local zero_baseline_leakage "Y"
	estimates store reg_3 


* Regressions testing for SUTVA -- control sites with above-median baseline leakage: 

qui sum Tot_emiss_Leak1, detail
local median_baseline_leak = r(p50)

reg Tot_emiss_Leak5 log_total_Sites_by_Op_1	Tot_emiss_Leak1                                                      if Tot_emiss_Leak1 >= `median_baseline_leak' & Treatment_Ind == 0, cluster(Operator_1)
	estadd local above_median_baseline_leakage "Y"
	estimates store reg_4 
reg Tot_emiss_Leak5 log_total_Sites_by_Op_1	mean_Treat_by_Op 	Tot_emiss_Leak1                                      if Tot_emiss_Leak1 >= `median_baseline_leak' & Treatment_Ind == 0, cluster(Operator_1)
	estadd local above_median_baseline_leakage "Y"
	estimates store reg_5 
reg Tot_emiss_Leak5 log_total_Sites_by_Op_1	mean_Treat_by_Op mean_above_med_Leak_treat_sites 	Tot_emiss_Leak1     if Tot_emiss_Leak1 >= `median_baseline_leak' & Treatment_Ind == 0, cluster(Operator_1)
	estadd local above_median_baseline_leakage "Y"
	estimates store reg_6 


	#delimit ;
	esttab reg_2 reg_3 reg_5 reg_6 
		using "Paper/Figures/spillover_test_control_sites_OLS.tex",
		b(%8.2f) se(%8.2f) nogaps mtitles("leakage" "leakage" "leakage" "leakage")
		replace se compress label numbers nonotes nostar
		scalars("zero_baseline_leakage Zero baseline leakage" 
		     "above_median_baseline_leakage Above median baseline leakage" 
			 "r2 \hline R\textsuperscript{2}") obslast ;
	#delimit cr

	
	


***********************************************

* Plot of operator site count and fraction treated
duplicates report mean_Treat_by_Op total_operator_sites if tag_Operator_1
scatter  mean_Treat_by_Op total_operator_sites if tag_Operator_1, ///
	jitter(1) jitterseed(0) msymbol(X) mcolor(black) msize(large) xtitle("Number of sites") ytitle("Fraction of sites treated") ///
	graphregion(color(white)) bgcolor(white) 
graph export "Paper/Figures/fraction_treated_by_operator.pdf", replace


* outputting operator site count statistics 
	qui sum  total_operator_sites if tag_Operator_1 
	local max_site_count = r(max)
	local min_site_count = r(min)

	write_single_num `max_site_count', file("Paper/Figures/single_numbers/highest_site_count.tex")
	write_single_num `min_site_count', file("Paper/Figures/single_numbers/lowest_site_count.tex")
	
	
	qui count if tag_Operator_1 == 1 & total_operator_sites == `max_site_count'
	local count_max = r(N)
	write_single_num `count_max', file("Paper/Figures/single_numbers/operator_count_most_sites.tex")

	qui count if tag_Operator_1 == 1 & total_operator_sites == `min_site_count'
	local count_min = r(N)
	write_single_num `count_min', file("Paper/Figures/single_numbers/operator_count_fewest_sites.tex")


	
	
	
***************************************************************************************
*
* REGRESSIONS CONDITIONAL ON ZERO BASELINE LEAKAGE -- LINEAR
*
***************************************************************************************

	
	* Top table (the As) has titles and column numbers
	* Bottom table (the Bs) have  mean of control group, mean of control group, and N
	* Both tables have average_treat_effect and R2

	* additional controls are:
	local added_controls log1_Tot_emiss_Vent1 log1_ProdGas_1 log1_ProdOil_1 /* GasSite */
	local added_controls2 pos_Tot_emiss_Vent1 log_Tot_emiss_Vent1 Treat_pos_Tot_emiss_Vent1 Treat_log_Tot_emiss_Vent1
	local added_controls3 log1_ProdGas_1 log1_ProdOil_1
	local added_controls4 pos_Tot_emiss_Vent1 log_Tot_emiss_Vent1 log1_ProdGas_1 log1_ProdOil_1 /* GasSite */
	local added_controls5 log1_Tot_emiss_Vent1 Treat_log1_Tot_emiss_Vent1
	
	* Leakage:
	reg Tot_emiss_Leak5 Treatment_Ind if Tot_emiss_Leak1==0, cluster(Operator_1)
		test Treatment_Ind
		estadd scalar p_value r(p)
		estimates sto reg_1_zOLS
		local approx_treat_effect = round(_b[Treatment_Ind], .1)
		di `approx_treat_effect'
		write_single_num `approx_treat_effect', file("Paper/Figures/single_numbers/treat_effect_leak_no_controls_zero_baseline_leakage_OLS.tex")
		
		
	reg Tot_emiss_Leak5 Treatment_Ind `added_controls' if Tot_emiss_Leak1==0, cluster(Operator_1)
		test Treatment_Ind
		estadd scalar p_value r(p)
		estadd local has_controls "Y"
		estimates sto reg_2_zOLS
		local approx_treat_effect = round(_b[Treatment_Ind], .1)
		write_single_num `approx_treat_effect', file("Paper/Figures/single_numbers/treat_effect_leak_w_controls_zero_baseline_leakage_OLS.tex")
		test Treatment_Ind

		
	* Total emissions:
	reg Emissionskgd_5 Treatment_Ind                 if Tot_emiss_Leak1==0, cluster(Operator_1)
		test Treatment_Ind
		estadd scalar p_value r(p)
		estimates sto reg_3_zOLS
		local approx_treat_effect = round(_b[Treatment_Ind], .1)
		write_single_num `approx_treat_effect', file("Paper/Figures/single_numbers/treat_effect_emiss_no_controls_zero_baseline_leakage_OLS.tex")
		test Treatment_Ind
	
	reg Emissionskgd_5 Treatment_Ind `added_controls' if Tot_emiss_Leak1==0, cluster(Operator_1)
		test Treatment_Ind
		estadd scalar p_value r(p)
		estadd local has_controls "Y"
		estimates sto reg_4_zOLS
		local approx_treat_effect = round(_b[Treatment_Ind], .1)
		write_single_num `approx_treat_effect', file("Paper/Figures/single_numbers/treat_effect_emiss_w_controls_zero_baseline_leakage_OLS.tex")
		test Treatment_Ind
		
	**** Outputs regression results:
	#delimit ;
	esttab reg_1_zOLS reg_2_zOLS reg_3_zOLS reg_4_zOLS
		using "Paper/Figures/reg_zero_baseline_leakage_OLS.tex",
		order(Treatment_Ind _cons)
		keep(Treatment_Ind _cons )
		b(%8.2f) se(%8.2f) nogaps mtitles("leakage" "leakage" "emissions" "emissions")
		replace se compress label numbers nonotes nostar  /* star(* .1 ** .05 *** .01) */
		scalars("p_value Treatment effect p-value" "has_controls Controls" "r2 \hline R\textsuperscript{2}") obslast;
	#delimit cr

	* Same thing except a fragment version with no header:
	#delimit ;
	esttab reg_1_zOLS reg_2_zOLS reg_3_zOLS reg_4_zOLS
		using "Paper/Figures/reg_zero_baseline_leakage_OLS_fragment.tex",
		order(Treatment_Ind _cons)
		keep(Treatment_Ind _cons )
		b(%8.2f) se(%8.2f) nogaps mtitles("leakage" "leakage" "emissions" "emissions") fragment
		replace se compress label numbers nonotes nostar  /* star(* .1 ** .05 *** .01) */
		scalars("has_controls Controls" "r2 \hline R\textsuperscript{2}") obslast;
	
	* Same thing except all of the covariates included:
	#delimit ;
	esttab reg_1_zOLS reg_2_zOLS reg_3_zOLS reg_4_zOLS
		using "Paper/Figures/reg_zero_baseline_leakage_OLS_detail.tex",
		b(%8.2f) se(%8.2f) nogaps mtitles("leakage" "leakage" "emissions" "emissions")
		replace se compress label numbers nonotes nostar  /* star(* .1 ** .05 *** .01) */
		scalars("has_controls Controls" "r2 \hline R\textsuperscript{2}") obslast;
	#delimit cr
	
	
	
	*********
	
	* Controlling for interaction between Treatment and baseline venting:
	reg Tot_emiss_Leak5 Treatment_Ind `added_controls5' if Tot_emiss_Leak1 == 0, cluster(Operator_1)
		estimates sto reg_1
	reg Emissionskgd_5 Treatment_Ind `added_controls5' if Tot_emiss_Leak1 == 0, cluster(Operator_1)
		estimates sto reg_2
		
	#delimit ;
	esttab reg_1 reg_2 using "Paper/Figures/reg_zero_baseline_leakage_OLS_Treat_x_Vent.tex",
		b(%8.2f) se(%8.2f) nogaps mtitles("leakage" "emissions")
		replace se compress label numbers nonotes nostar  /* star(* .1 ** .05 *** .01) */
		scalars("r2 \hline R\textsuperscript{2}") obslast;
	#delimit cr

	*********
		
		

	* Regressions on treatment intensity -- in appendix.

	reg Tot_emiss_Leak5 Treatment_Ind Treatment_Intensity if Tot_emiss_Leak1==0, cluster(Operator_1)
		estimates sto reg_1

	reg Tot_emiss_Leak5 Treatment_Ind Treatment_Intensity `added_controls' if Tot_emiss_Leak1==0, cluster(Operator_1)
		estadd local has_controls "Y"
		estimates sto reg_2
	
	* Total emissions:
	reg Emissionskgd_5 Treatment_Ind Treatment_Intensity                  if Tot_emiss_Leak1==0, cluster(Operator_1)
		estimates sto reg_3
	
	reg Emissionskgd_5 Treatment_Ind Treatment_Intensity `added_controls' if Tot_emiss_Leak1==0, cluster(Operator_1)
		estadd local has_controls "Y"
		estimates sto reg_4

		
	#delimit ;
	esttab reg_1 reg_2 reg_3 reg_4
		using "Paper/Figures/reg_zero_baseline_leakage_treatment_intensity_OLS.tex",
		order(Treatment_Ind Treatment_Intensity _cons)
		keep(Treatment_Ind Treatment_Intensity _cons )
		b(%8.2f) se(%8.2f) nogaps mtitles("leakage" "leakage" "emissions" "emissions")
		replace se compress label numbers nonotes nostar  /* star(* .1 ** .05 *** .01) */
		scalars("has_controls Controls" "r2 \hline R\textsuperscript{2}") obslast;
	#delimit cr
	

	
	
	
	* Count of leaks:

	* Leakage:
	reg Tot_comp_Leak5 Treatment_Ind if Tot_emiss_Leak1==0, cluster(Operator_1)
		estimates sto reg_1_countOLS
	reg Tot_comp_Leak5 Treatment_Ind `added_controls' if Tot_emiss_Leak1==0, cluster(Operator_1)
		estadd local has_controls "Y"
		estimates sto reg_2_countOLS
		
	poisson Tot_comp_Leak5 Treatment_Ind if Tot_emiss_Leak1==0, cluster(Operator_1)
		estimates sto reg_1_countPOISSON
	poisson Tot_comp_Leak5 Treatment_Ind `added_controls' if Tot_emiss_Leak1==0, cluster(Operator_1)
		estadd local has_controls "Y"
		estimates sto reg_2_countPOISSON

	#delimit ;
	esttab reg_1_countOLS reg_2_countOLS reg_1_countPOISSON reg_2_countPOISSON
		using "Paper/Figures/reg_zero_baseline_leakage_COUNT.tex",
		order(Treatment_Ind _cons)
		keep(Treatment_Ind _cons )
		b(%8.2f) se(%8.2f) nogaps mtitles("OLS" "OLS" "Poisson" "Poisson")
		replace se compress label numbers nonotes nostar nogaps eqlabels("", nomerge)  /* star(* .1 ** .05 *** .01) */
		scalars("has_controls Controls" "r2 \hline R\textsuperscript{2}" "ll Log likelihood") obslast;
	#delimit cr
		

	*********************************************************
	* Heterogeneous treatment effects by production value and well age
	
	* Catogorizing wells by whether above median production value and below median well age:
	sum prodvalue_1 if Tot_emiss_Leak1 == 0 & !missing(prodvalue_1), detail
	gen gt_p50_value_1 = prodvalue_1 >= r(p50) if Tot_emiss_Leak1 == 0 & !missing(prodvalue_1)
	gen Treatment_gt_p50_value_1 = Treatment_Ind *  gt_p50_value_1 if Tot_emiss_Leak1 == 0 & !missing(prodvalue_1)
	label var gt_p50_value_1 "Higher revenue"
	label var Treatment_gt_p50_value_1 "Treatment x higher revenue"
	
	sum Site_start_year if Tot_emiss_Leak1 == 0, detail
	gen later_site = Site_start_year >= r(p50) if Tot_emiss_Leak1 == 0 
	gen Treatment_later_site = Treatment_Ind * later_site if Tot_emiss_Leak1 == 0
	
	label var later_site "Younger site"
	label var Treatment_later_site "Treatment x younger site"


		
		
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* FOR OUT OF SAMPLE CALCULATIONS WITHIN BOOTSTRAPPING: 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

* Creates a .dta file with info used for calcluating treatment effects

	* preparation:
	preserve

				
	* Sets an "out-of-sample" indicator variable:
	* While this is the full sample and seems repetiative, it will be useful to have when we do bootstrapping
	*  and want to get confidence intervals on the distribution of treatment effects over the full original sample
	gen oos = 1
	gen oos_full_sample = 1
	
	*** Then adds additional observations for more cases			
	
	* Gets number of observations:
	local new_N = _N+4
	local old_N_plus_1 = _N+1
	
	* sets new number of observations and modifies markers:
	set obs `new_N'
	replace oos = 1 in `old_N_plus_1'/`new_N'
	replace oos_full_sample = 0 in `old_N_plus_1'/`new_N'
	
	* Adds data for the new_N minus old_N new observations:
	
	* Treatment and interaction with treatment will be zero
	foreach var of varlist Treatment_Ind ///
		Treat_Tot_emiss_Leak1 Treat_Tot_emiss_Vent1 ///
		Treat_pos_Tot_emiss_Leak1 Treat_pos_Tot_emiss_Vent1 ///
		Treat_log_Tot_emiss_Leak1 Treat_log_Tot_emiss_Vent1 ///
		Treat_log1_Tot_emiss_Leak1 Treat_log1_Tot_emiss_Vent1 ///
		Treat_isnh_Tot_emiss_Leak1 Treat_isnh_Tot_emiss_Vent1 ///
		{
			replace `var' = 0 in `old_N_plus_1'/`new_N' 
		}
	
	* old_N + 1 -- has zero
	gen oos_zero_baseline = _n==`old_N_plus_1'
	replace Tot_emiss_Leak1 = 0 if oos_zero_baseline
	replace Tot_emiss_Vent1 = 0 if oos_zero_baseline
	
	* old_N + 2 -- case where both leak and vent are at 95th percentile
	gen oos_p95_baseline = _n ==`old_N_plus_1' + 1
	qui sum Tot_emiss_Leak1, detail
	replace Tot_emiss_Leak1 = r(p95) if oos_p95_baseline
	qui sum Tot_emiss_Vent1, detail
	replace Tot_emiss_Vent1 = r(p95) if oos_p95_baseline

	* old_N + 3 -- case where leak is at 50 and vent is at zero:
	gen oos_50leak = _n == `old_N_plus_1' + 2
	qui replace Tot_emiss_Leak1 = 50 if oos_50leak 
	qui replace Tot_emiss_Vent1 = 0 if oos_50leak
	
	* old_N + 4 -- case where vent is at 50 and lzeak is at zero:
	gen oos_50vent = _n == `old_N_plus_1' + 3
	qui replace Tot_emiss_Leak1 = 0 if oos_50vent 
	qui replace Tot_emiss_Vent1 = 50 if oos_50vent
	
	* Fills in other variables for these new observations:
	foreach var of varlist Tot_emiss_Leak1 Tot_emiss_Vent1 {
		* Whether positive:
		replace pos_`var' = `var'>0 if ~missing(`var')
		
		* True log
		replace log_`var' = log(`var')

		* "log" of -- where is defined as zero if variable is zero
		replace log_`var' = log((`var'==0) + (`var'>0)*`var') if ~missing(`var')
		
		* log + 1
		replace log1_`var' = log(1 + `var') if ~missing(`var')
		
		* inverse hyperbolic sine
		replace isnh_`var' = log(`var' + (`var'^2 + 1)^0.5) if ~missing(`var')
	}
		
	* Saves file:
	save "Data/Intermediate/oos.dta", replace
	
	* goes back to original data:
	restore
	
		
	
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* FULL SPEC USING TWOPM AND OUT OF SAMPLE PREDICTION
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 



	* Creates more observations and create covariates for them:
	cap program drop add_obs
	program define add_obs
	
		syntax [, twopm_if(string)]
		
		* Gets current N
		local old_N = _N
	
		* appends on new data:
		append using "Data/Intermediate/oos.dta"
		
		* sets all oos_variables to be zero if not in the appended set
		foreach var of varlist oos* {
			replace `var' = 0 if _n<=`old_N'
		}
		
		* drops all added cases that don't fit the twopm_if statement:
		if "`twopm_if'" != "" {
			tempvar temp_if
			qui gen `temp_if' = 1 `twopm_if'
			qui replace `temp_if' = 0 if missing(`temp_if')
			
			keep if _n<=`old_N' | `temp_if'==1
		}
		
	end

	* Program to drop those added observations:
	cap program drop remove_obs
	program define remove_obs

		drop if oos==1
		drop oos*
		
	end
	
	
	* Program to get treatment effect:
	cap program drop treat_effect
	program define treat_effect
		syntax newvarlist(min=3)
		* These variables should treatment effect, expectation conditional on control, and expectatoin conditional on treatment:
		tempvar pred_control pred_treat
		* list of treatment interaction variables. Note: they all need to
		* be of the name Treat_ + <original variable name>
		#delimit ;
		local treat_interact_vars 
					Treat_Tot_emiss_Leak1
					Treat_Tot_emiss_Vent1
					Treat_ProdGas_1
					Treat_ProdOil_1
					Treat_pos_Tot_emiss_Leak1
					Treat_pos_Tot_emiss_Vent1
					Treat_pos_ProdGas_1
					Treat_pos_ProdOil_1
					Treat_log_Tot_emiss_Leak1
					Treat_log_Tot_emiss_Vent1
					Treat_log_ProdGas_1
					Treat_log_ProdOil_1
					Treat_log1_Tot_emiss_Leak1
					Treat_log1_Tot_emiss_Vent1
					Treat_log1_ProdGas_1
					Treat_log1_ProdOil_1
					Treat_isnh_Tot_emiss_Leak1
					Treat_isnh_Tot_emiss_Vent1
					Treat_isnh_ProdGas_1
					Treat_isnh_ProdOil_1
					Treat_GasSite
				;
		#delimit cr
		* Creates original version variables that show initial value:
		tempvar orig_Treatment_Ind
		qui gen `orig_Treatment_Ind' = Treatment_Ind
		foreach var of varlist `treat_interact_vars' {
			tempvar orig_`var'
			qui gen `orig_`var'' = `var'
		}
		* Then looks at control scenario and predicts outcomes :
		qui replace Treatment_Ind = 0
		foreach var of varlist `treat_interact_vars' {
			qui replace `var' = 0
		}
		qui predict `pred_control'
		* Then looks at treatment scenario and predicts outcomes  :
		qui replace Treatment_Ind = 1
		foreach var of varlist `treat_interact_vars' {
			local var_without_treat_interaction = subinstr("`var'", "Treat_", "", 1)
			qui replace `var' = `var_without_treat_interaction'
		}
		qui predict `pred_treat'
		* Then replaces all of the treatment variables with their original values:
		qui replace Treatment_Ind = `orig_Treatment_Ind'
		foreach var of varlist `treat_interact_vars' {
			qui replace `var' = `orig_`var''
		}
		
		* Outputs the variables:
		local treat_effect_var : word 1 of `varlist'
		gen `treat_effect_var' = `pred_treat' - `pred_control'
		local E_cond_control : word 2 of `varlist'
		gen `E_cond_control' = `pred_control'
		local E_cond_treat : word 3 of `varlist'
		gen `E_cond_treat' = `pred_treat'
		
	end

	
	* Program for doing two_pm and outputting covariate-specific treatment effects
	cap program drop twopm_treat_effects
	program define twopm_treat_effects, eclass
	
		syntax [newvarlist(max=3)] , twopm_cmd(string) [ twopm_if(string) ]
		* newvarname1 is the treatment effect variable
		* newvarname2 is the expectation conditional on control and covariates
		* newvarname3 is the expectation conditional on treatment and covariates
		* twopm_cmd should be the full command for running twopm
		* if there is an "if" statement, it should be included in the command in twopm_cmd and then also in twopm_if, e.g.:
		*     twopm_cmd(twopm y x if z==0, first(..) second(..)) twopm_if(if z==0) 
		
		* Runs the twopm estimation:
		`twopm_cmd'
			
		* Adds in observations for out of sample prediction
		quietly add_obs, twopm_if(`twopm_if') 
		
		* Predict treatment effects for everything including those added by add_obs:
		tempvar treat_effect E_control E_treat
		treat_effect `treat_effect' `E_control' `E_treat'

		* Outputs treatment effect for our out-of-sample cases:
		* Cases where leakage and venting at baseline are both zero:
		qui sum `treat_effect' if oos_zero_baseline, meanonly
		estadd scalar teffect_at_zero = r(mean)
		* Cases where leakage and venting at baseline are at 95th percentile:
		qui sum `treat_effect' if oos_p95_baseline, meanonly
		estadd scalar teffect_at_p95 = r(mean)
		* Cases where has 50 kgd of leakage  but not venting
		qui sum `treat_effect' if oos_50leak, meanonly
		estadd scalar teffect_50leak = r(mean)
		* Cases where has 50 kgd of venting but no leakage:
		qui sum `treat_effect' if oos_50vent, meanonly
		estadd scalar teffect_50vent = r(mean)
		
		if ~missing("`twopm_if'") {
			qui sum `treat_effect' `twopm_if' & oos_zero_baseline , meanonly
			estadd scalar if_teffect_at_zero = r(mean)
			* Cases where leakage and venting at baseline are at 95th percentile:
			qui sum `treat_effect' `twopm_if' & oos_p95_baseline, meanonly
			estadd scalar if_teffect_at_p95 = r(mean)
			* Cases where has 50 kgd of leakage  but not venting
			qui sum `treat_effect' `twopm_if' & oos_50leak, meanonly
			estadd scalar if_teffect_50leak = r(mean)
			* Cases where has 50 kgd of venting but no leakage:
			qui sum `treat_effect' `twopm_if' & oos_50vent, meanonly
			estadd scalar if_teffect_50vent = r(mean)
		}
		
		* Distribution of effects from the full sample:
		qui sum `treat_effect' if oos_full_sample, detail
		local rp1 = r(p1)
		local rp99 = r(p99)
		estadd scalar teffect_dist_mean = r(mean)
		estadd scalar teffect_dist_p50 = r(p50)
		estadd scalar teffect_dist_p5  = r(p5)
		estadd scalar teffect_dist_p95 = r(p95)
		
		* Effect by quintile of baseline emisisons
		qui sum `treat_effect' if oos_full_sample & Tot_emiss_Leak1==0, meanonly
		estadd scalar teffect_0Leak = r(mean)
		qui sum `treat_effect' if oos_full_sample & Emissionskgd_1==0, meanonly
		estadd scalar teffect_0Emiss = r(mean)
		qui centile Emissionskgd_1, centile(20, 30, 40, 50, 60, 70, 80, 90, 100)
		forvalues i=1/9 {
			local c_`i' = r(c_`i')
		}
		forvalues i=1/8 {
			local i_plus_1 = `i'+1
			qui sum `treat_effect' if oos_full_sample & Emissionskgd_1 > `c_`i'' & Emissionskgd_1 <= `c_`i_plus_1''
			estadd scalar teffect_C`i_plus_1'Emiss = r(mean)
		}
		
		
		* mean conditional on excluding outliers:
		qui sum `treat_effect' if oos_full_sample & `treat_effect'>`rp1' & `treat_effect'<`rp99', meanonly
		estadd scalar teffect_av_excl_outliers = r(mean)
		
		* mean for specific values:
		qui sum `treat_effect' if oos_full_sample & Tot_emiss_Leak1==0, meanonly
		estadd scalar teffect_dist_0leak = r(mean)
		
		* Same as above but using the if statement:
		* Distribution of the effects from the efull sample conditional on the if statement:
		* Distribution of effects from the full sample:
		
		if ~missing("`twopm_if'") {
			qui sum `treat_effect' `twopm_if' & oos_full_sample, detail
			local rp1 = r(p1)
			local rp99 = r(p99)
			estadd scalar if_teffect_dist_mean = r(mean)
			estadd scalar if_teffect_dist_p50 = r(p50)
			estadd scalar if_teffect_dist_p5  = r(p5)
			estadd scalar if_teffect_dist_p95 = r(p95)
			
			* mean conditional on excluding outliers:
			qui sum `treat_effect' `twopm_if' & oos_full_sample & `treat_effect'>`rp1' & `treat_effect'<`rp99', meanonly
			estadd scalar if_teffect_av_excl_outliers = r(mean)
		}
		
		* Removes observations that are added on:
		quietly remove_obs
		
		* Outputs a treatment effect variable if a variable name (`1') is specified
		* Only outputs this if within sample (e.g., within the twopm_if condition)
		if "`varlist'" != "" {
			local newvarcount : word count `varlist'
			if `newvarcount' >= 1 {
				local newvarname1 : word 1 of `varlist'
				gen `newvarname1' = `treat_effect'
			}
			if `newvarcount' >= 2 {
				local newvarname2 : word 2 of `varlist'
				gen `newvarname2' = `E_control'
			}
			if `newvarcount' >= 3 {
				local newvarname3 : word 3 of `varlist'
				gen `newvarname3' = `E_treat'
			}
		}
		
	end
	
	
	* Program for returning bootstrap-estimation results into the main estimate stored:
	* The argument ('name') is the name of the previous stored estimation results
	* bs_e_list is the list of the additional things that were bootstrapped over
	cap program drop return_boot_results
	program define return_boot_results
		
		* e(N_misreps) =  0
		
		syntax anything, bs_e_list(string)
		* Run estat bootstrap to get confidence intervals:
		estat bootstrap
		* Gets number of misreps:
		local N_misreps = e(N_misreps)
		* Gets the additional results as a matrices:
		matrix elist_ci_bc = e(ci_bc)
		matrix elist_b = e(b)
		* Adds column names
		matrix colnames elist_b = `bs_e_list'
		matrix colnames elist_ci_bc = `bs_e_list' 
		* restores the estimate results
		estimates restore `anything'
		* Outputs the misreps:
		estadd local N_misreps `N_misreps'
		* outputs the matrices into the estimate using estadd:
		estadd matrix elist_ci_bc = elist_ci_bc
		estadd matrix elist_b = elist_b
		* Gets the confidence lower bounds and upper bounds as scalars;
		* Gets the 95% confidence interval as a local string:
		local i=1
		foreach e_item in `bs_e_list' {
			local e_item_stripped = subinstr("`e_item'","e(","",1)
			local e_item_stripped = subinstr("`e_item_stripped'",")","",1)
			local ll = e(elist_ci_bc)[1,`i']
			local ul = e(elist_ci_bc)[2,`i']
			estadd scalar ll_`e_item_stripped' = `ll'
			estadd scalar ul_`e_item_stripped' = `ul'
			local ll_round = round(`ll',0.1)
			local ul_round = round(`ul',0.1)
			local bsci = " [" + string(`ll_round', "%08.1fc") + "," + string(`ul_round', "%08.1fc") + "] "
			estadd local bsci_`e_item_stripped' `bsci'
			local i = `i' + 1
		}
		
		* Resaves the estimates
		est sto `anything'
		
	end
	


	
	
	
	
	* Program to create graphics:
	cap program drop graph_treat_control
	program define graph_treat_control
		syntax varlist(min=3 max=3) [, saving(string) xtitle(passthru) ytitle(passthru)]
		* first variable is the prediction for control; second is prediction for treatment; 
		* third is the baseline variable (e.g., log1_Tot_emiss_Leak1)
		
		local control_var : word 1 of `varlist'
		local treat_var : word 2 of `varlist'
		local baseline_var : word 3 of `varlist'
		
		tempvar log1_control log1_treat orig_n
		qui gen `log1_control' = log(1 + `control_var')
		qui gen `log1_treat'   = log(1 + `treat_var')
		qui gen `orig_n' = _n
		
		qui sort `baseline_var'
		twoway lowess `log1_control' `baseline_var', lpattern(solid) || ///
		       lowess `log1_treat'   `baseline_var', lpattern(dash) ///
			   legend(label(1 "control") label(2 "treatment")) ///
			   graphregion(color(white)) bgcolor(white) ///
			   `xtitle' `ytitle'
		
		qui sort `orig_n'
	end 
	

	

* * * * * * * * * * * * * * * * * * *	
* POWER CALCULATIONS -- OUTPUTTING NUMBERS SO THEY CAN BE USED BY gelman_carlin.R	
* * * * * * * * * * * * * * * * * * *		
	
	* outputs numbers to be used in power and Gelman-Carlin calculations.
	gen muT = .
	gen muC = .
	gen sdT = .
	gen sdC = .
	gen nT = .
	gen nC = .
	local i = 1
	foreach var of varlist Tot_emiss_Leak5 Emissionskgd_5 {
		summarize `var' if Treatment_Ind == 1 & Tot_emiss_Leak1==0
		replace muT = r(mean) in `i'
		replace sdT = r(sd) in `i'
		replace nT = r(N) in `i'
		summarize `var' if Treatment_Ind == 0 & Tot_emiss_Leak1==0
		replace muC = r(mean) in `i'
		replace sdC = r(sd) in `i'
		replace nC = r(N) in `i'
		local i = `i'+1
	}

	export delimited muT muC sdT sdC nT nC using "Data/Intermediate/power_gelman_carlin_inputs.csv" in 1/2, replace 

	drop muT muC sdT sdC nT nC 

	
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* RUNNING TWOPM_TREAT_EFFECTS
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

	* Setting things up:
	
	* List of control variables no interactions conditional on zero baseline leakage:

	
	local added_controls_L0 log1_Tot_emiss_Vent1 log1_ProdGas_1 log1_ProdOil_1 /* GasSite */
	* List of control variables including interactions conditional on zero baseline leakage:
	local added_controls_Int_L0 `added_controls_L0' Treat_log1_Tot_emiss_Vent1 /*Treat_log1_ProdGas_1 Treat_log1_ProdOil_1*/ /* Treat_GasSite */
	* List of control variables no interactions unconditional on baseline leakage (includes controls for baseline leakage)
	local added_controls_LA `added_controls_L0' log1_Tot_emiss_Leak1
	* List of control variables including interactions unconditional on baseline leakage (includes controls for baseline leakage)
	local added_controls_Int_LA `added_controls_Int_L0' log1_Tot_emiss_Leak1 Treat_log1_Tot_emiss_Leak1

	di "`added_controls_L0'"
	di "`added_controls_Int_L0'"
	di "`added_controls_LA'"
	di "`added_controls_Int_LA'"
	
	* List of if statements:
	local if_L0 if Tot_emiss_Leak1 == 0
	local if_L0_nonmiss if Tot_emiss_Leak1 == 0 & ~missing(ProdGas_1)
	
	count `if_L0'
	reg Tot_emiss_Leak5 Treatment_Ind `if_L0'
	count `if_L0' &  ~missing(Operator_1)
	codebook Operator_1
	
	
	* List of _e for bootstrapping:
	* List for conditional on baseline leakage = 0 
	local bs_e_list_L0 e(teffect_0Leak) 
	* List for unconditoinal
	local bs_e_list_LA e(teffect_C9Emiss) e(teffect_C8Emiss) e(teffect_C7Emiss) ///
		e(teffect_C6Emiss) e(teffect_C5Emiss) e(teffect_C4Emiss) e(teffect_C3Emiss) ///
		e(teffect_C2Emiss) e(teffect_0Emiss) e(teffect_0Leak)

	* number of reps:
	local bs_reps = bs_reps

	******************************************
	* Conditional on zero baseline leakage:
	******************************************

	* Structure:
		* Sets up specification, e.g. local TM_leak_L0_1 twopm Tot_emiss_Leak5 ...
		* Runs the twopm estimation: twopm_treat_effects <treatment effect> <expectation if control> <expectation if treatment>, twopm_cmd(...)
		* Stores results, e.g., estimates store <estimate_name>
		* Runs bootstrapping
		* Adds bootstrapped results to the stored estimates above
	
	
	* Spec 1 L -- leakage with no controls:
	local TPM_leak_L0_1 twopm Tot_emiss_Leak5 Treatment_Ind  `if_L0', ///
			first(logit) second(glm, family(normal) link(log)) ///
			vce(cluster Operator_1)
	twopm_treat_effects te_leak_L0_1 ctr_leak_L0_1 trt_leak_L0_1, twopm_cmd(`TPM_leak_L0_1') twopm_if(`if_L0')
		local treat_effect_rounded = round(e(teffect_0Leak), 0.1)
		write_single_num `treat_effect_rounded' , file("Paper/Figures/single_numbers/treat_effect_leak_no_controls_zero_baseline_leakage_TWOPM.tex")
	estadd local has_controls ""
	estimates store twopm_leak_L0_1
	bootstrap `bs_e_list_L0', reps(`bs_reps') cluster(Operator_1): twopm_treat_effects, twopm_cmd(`TPM_leak_L0_1') twopm_if(`if_L0')
	return_boot_results twopm_leak_L0_1 , bs_e_list(`bs_e_list_L0')
	
	* Spec 2 L -- leakage with controls:
	local TPM_leak_L0_2 twopm Tot_emiss_Leak5 Treatment_Ind `added_controls_L0' `if_L0_nonmiss', ///
			first(logit) second(glm, family(normal) link(log)) ///
			vce(cluster Operator_1)
	twopm_treat_effects te_leak_L0_2 ctr_leak_L0_2 trt_leak_L0_2, twopm_cmd(`TPM_leak_L0_2') twopm_if(`if_L0_nonmiss')
		local treat_effect_rounded = round(e(teffect_0Leak), 0.1)
		write_single_num `treat_effect_rounded' , file("Paper/Figures/single_numbers/treat_effect_leak_w_controls_zero_baseline_leakage_TWOPM.tex")
	estadd local has_controls "Y"
	estadd local include_interactions "Y"
	estimates store twopm_leak_L0_2
	bootstrap `bs_e_list_L0', reps(`bs_reps') cluster(Operator_1): twopm_treat_effects, twopm_cmd(`TPM_leak_L0_2') twopm_if(`if_L0_nonmiss')
	return_boot_results twopm_leak_L0_2 , bs_e_list(`bs_e_list_L0')
	

	* Spec 1 E  -- emissions with no other controls:
	local TPM_emiss_L0_1 twopm Emissionskgd_5 Treatment_Ind `if_L0', ///
			first(logit) second(glm, family(normal) link(log)) ///
			vce(cluster Operator_1)
	twopm_treat_effects te_emiss_L0_1 ctr_emiss_L0_1 trt_emiss_L0_1, twopm_cmd(`TPM_emiss_L0_1') twopm_if(`if_L0')
		local treat_effect_rounded = round(e(teffect_0Leak), 0.1)
		write_single_num `treat_effect_rounded' , file("Paper/Figures/single_numbers/treat_effect_emiss_no_controls_zero_baseline_leakage_TWOPM.tex")
	estadd local has_controls ""
	estimates store twopm_emiss_L0_1
	bootstrap `bs_e_list_L0', reps(`bs_reps') cluster(Operator_1): twopm_treat_effects, twopm_cmd(`TPM_emiss_L0_1') twopm_if(`if_L0')
	return_boot_results twopm_emiss_L0_1 , bs_e_list(`bs_e_list_L0')
	
	* Spec 2 E -- emissions with controls:
	local TPM_emiss_L0_2 twopm (Emissionskgd_5 Treatment_Ind) (Emissionskgd_5 Treatment_Ind `added_controls_L0') `if_L0_nonmiss', ///
			first(logit) second(glm, family(normal) link(log)) ///
			vce(cluster Operator_1)
	twopm_treat_effects te_emiss_L0_2 ctr_emiss_L0_2 trt_emiss_L0_2, twopm_cmd(`TPM_emiss_L0_2') twopm_if(`if_L0_nonmiss')
		local treat_effect_rounded = round(e(teffect_0Leak), 0.1)
		write_single_num `treat_effect_rounded' , file("Paper/Figures/single_numbers/treat_effect_emiss_w_controls_zero_baseline_leakage_TWOPM.tex")
	estadd local has_controls "Y"
	estadd local include_interactions "Y"
	estimates store twopm_emiss_L0_2
	bootstrap `bs_e_list_L0', reps(`bs_reps') cluster(Operator_1): twopm_treat_effects, twopm_cmd(`TPM_emiss_L0_2') twopm_if(`if_L0_nonmiss')
	return_boot_results twopm_emiss_L0_2 , bs_e_list(`bs_e_list_L0')
	
	
	* Outputs a regression table:
	#delimit ;
	esttab twopm_leak_L0_1 twopm_leak_L0_2 twopm_emiss_L0_1 twopm_emiss_L0_2
		using "Paper/Figures/reg_zero_base_leakage_TWOPM.tex",
		keep(Treatment_Ind _cons) replace  obslast
			scalars(
			"teffect_0Leak Average treatment effect" 
			"bsci_teffect_0Leak 95\% confidence interval" 
			"has_controls Controls"
			"ll Log Likelihood" 
			"N_misreps Missed bootstrap reps")
		nostar nonotes label numbers se compress nogaps b(%8.2f) se(%8.2f) 
		mtitles("leakage" "leakage" "emissions" "emissions");
	#delimit cr

	
	* Outputs a fragment version:
	#delimit ;
	esttab twopm_leak_L0_1 twopm_leak_L0_2 twopm_emiss_L0_1 twopm_emiss_L0_2
		using "Paper/Figures/reg_zero_base_leakage_TWOPM_fragment.tex",
		drop(Treatment_Ind _cons `added_controls_L0') replace  obslast
			scalars(
			"teffect_0Leak Average treatment effect" 
			"bsci_teffect_0Leak 95\% confidence interval" 
			"has_controls \hline Controls"
			"ll \hline Log Likelihood" 
			/* "N_misreps Missed bootstrap reps" */ )
		nostar nonotes label numbers se compress nogaps b(%8.2f) se(%8.2f) 
		mtitles("leakage" "leakage" "emissions" "emissions") fragment;
	#delimit cr
	
	* Outputs a full version with all covariates:
	#delimit ;
	esttab twopm_leak_L0_1 twopm_leak_L0_2 twopm_emiss_L0_1 twopm_emiss_L0_2
		using "Paper/Figures/reg_zero_base_leakage_TWOPM_detail.tex",
		   replace  obslast
			scalars(
			"teffect_0Leak Average treatment effect" 
			"bsci_teffect_0Leak 95\% confidence interval" 
			"has_controls Controls"
			"ll Log Likelihood" 
			"N_misreps Missed bootstrap reps")
		nostar nonotes label numbers se compress nogaps b(%8.2f) se(%8.2f) 
		mtitles("leakage" "leakage" "emissions" "emissions");
	#delimit cr
	

	
	***************
	* Outputs methane social cost numbers:
	* 

	estimates restore reg_1_zOLS
	gen     leak_estimates = _b[Treatment_Ind] in 1
	estimates restore reg_2_zOLS
	replace leak_estimates = _b[Treatment_Ind] in 2
	estimates restore twopm_leak_L0_1 // we'l ignore this one
	estimates restore twopm_leak_L0_2
	replace leak_estimates = e(teffect_0Leak) in 3

	estimates restore reg_3_zOLS
	gen     emiss_estimates = _b[Treatment_Ind] in 1
	estimates restore reg_4_zOLS
	replace emiss_estimates = _b[Treatment_Ind] in 2
	estimates restore twopm_emiss_L0_1 // we'l ignore this one
	estimates restore twopm_emiss_L0_2
	replace emiss_estimates = e(teffect_0Leak) in 3
	
	qui sum leak_estimates, detail
	local median_leak_effect_round = round(r(p50))
	local median_leak_effect_DOLLARS = round(r(p50)*social_cost_methane_v2)
	write_single_num `median_leak_effect_round', file("Paper/Figures/single_numbers/median_leak_effect_round.tex")
	write_single_num `median_leak_effect_DOLLARS', file("Paper/Figures/single_numbers/median_leak_effect_DOLLARS.tex")

	qui sum emiss_estimates, detail
	local median_emiss_effect_round = round(r(p50))
	local median_emiss_effect_DOLLARS = round(r(p50)*social_cost_methane_v2)
	write_single_num `median_emiss_effect_round', file("Paper/Figures/single_numbers/median_emiss_effect_round.tex")
	write_single_num `median_emiss_effect_DOLLARS', file("Paper/Figures/single_numbers/median_emiss_effect_DOLLARS.tex")
	
	
	
* Erases oos.dta:
erase "Data/Intermediate/oos.dta"


